Load packages

Bring in data

# read in catch data
data <- readRDS("data_clean/seine_djfmp_ybfmp.rds")
summary(data)
##    EventID            Location           RegionCode    StationCode       
##  Length:430205      Length:430205      Min.   :1.000   Length:430205     
##  Class :character   Class :character   1st Qu.:1.000   Class :character  
##  Mode  :character   Mode  :character   Median :2.000   Mode  :character  
##                                        Mean   :2.812                     
##                                        3rd Qu.:4.000                     
##                                        Max.   :7.000                     
##                                        NA's   :15340                     
##     Datetime                        SampleDate             Month       
##  Min.   :1995-01-04 09:40:00.00   Min.   :1995-01-04   Min.   : 1.000  
##  1st Qu.:2002-01-16 12:57:00.00   1st Qu.:2002-01-16   1st Qu.: 4.000  
##  Median :2007-10-12 08:25:00.00   Median :2007-10-12   Median : 6.000  
##  Mean   :2007-08-29 06:37:40.15   Mean   :2007-08-28   Mean   : 6.428  
##  3rd Qu.:2013-05-06 10:00:00.00   3rd Qu.:2013-05-06   3rd Qu.: 9.000  
##  Max.   :2019-12-31 12:13:00.00   Max.   :2019-12-31   Max.   :12.000  
##                                                                        
##       Jday        MethodCode        GearConditionCode WeatherCode       
##  Min.   :  1.0   Length:430205      Min.   :1.00      Length:430205     
##  1st Qu.: 99.0   Class :character   1st Qu.:1.00      Class :character  
##  Median :167.0   Mode  :character   Median :1.00      Mode  :character  
##  Mean   :180.1                      Mean   :1.04                        
##  3rd Qu.:273.0                      3rd Qu.:1.00                        
##  Max.   :366.0                      Max.   :2.00                        
##                                                                         
##        DO           WaterTemp       Turbidity           Secchi      
##  Min.   :  1.02   Min.   : 3.00   Min.   :   0.14   Min.   :0.0     
##  1st Qu.:  7.70   1st Qu.:12.20   1st Qu.:   7.75   1st Qu.:0.2     
##  Median :  8.77   Median :16.67   Median :  14.40   Median :0.2     
##  Mean   :  8.81   Mean   :16.44   Mean   :  25.04   Mean   :0.2     
##  3rd Qu.:  9.85   3rd Qu.:20.50   3rd Qu.:  29.60   3rd Qu.:0.3     
##  Max.   :159.60   Max.   :31.80   Max.   :1000.00   Max.   :1.2     
##  NA's   :266407   NA's   :6221    NA's   :298836    NA's   :416088  
##  SpecificConductance   FlowDebris     SiteDisturbance    AlternateSite     
##  Min.   :    0.58    Min.   : NA      Length:430205      Length:430205     
##  1st Qu.:  120.90    1st Qu.: NA      Class :character   Class :character  
##  Median :  164.20    Median : NA      Mode  :character   Mode  :character  
##  Mean   :  301.57    Mean   :NaN                                           
##  3rd Qu.:  345.00    3rd Qu.: NA                                           
##  Max.   :34000.00    Max.   : NA                                           
##  NA's   :107003      NA's   :430205                                        
##      Volume       IEPFishCode          ForkLength        CountAdj      
##  Min.   :  1.20   Length:430205      Min.   :  0.00   Min.   :  0.000  
##  1st Qu.: 26.95   Class :character   1st Qu.:  0.00   1st Qu.:  0.000  
##  Median : 42.00   Mode  :character   Median :  0.00   Median :  0.000  
##  Mean   : 47.99                      Mean   : 19.45   Mean   :  2.047  
##  3rd Qu.: 63.00                      3rd Qu.: 33.00   3rd Qu.:  1.000  
##  Max.   :945.00                      Max.   :800.00   Max.   :901.200  
##  NA's   :902                                                           
##    GearCode              Year        Program         
##  Length:430205      Min.   :1995   Length:430205     
##  Class :character   1st Qu.:2002   Class :character  
##  Mode  :character   Median :2007   Mode  :character  
##                     Mean   :2007                     
##                     3rd Qu.:2013                     
##                     Max.   :2019                     
## 
## Add region:
## DJFMP region codes:
## 1 = Sacramento River
## 2, 3, 4, 7 = Delta
## 6 = SF Bay (should not appear in the data at this point)
## 5 = San Joaquin

# Complete set of regions:
all_regions <- c("Sacramento","Yolo","Liberty_Island","Delta","San_Joaquin")

data <- data %>%
  mutate(Region = case_when(Program == "DJFMP" & RegionCode == 2 & 
                              grepl("^LI",StationCode) ~ "Liberty_Island", 
                            RegionCode %in% c(1) ~ "Sacramento",
                            RegionCode %in% c(2,3,4,7) ~ "Delta",
                            RegionCode %in% c(5) ~ "San_Joaquin",
                            grepl("YBFMP", EventID) ~ "Yolo"))

## Carry out subsetting that is common to all species data sets: 
data <- data %>%
  filter(!(is.na(Volume))) %>% # Removing samples without volume data
  select(Program, EventID, Location, RegionCode, Region, StationCode, 
         SampleDate, Datetime, Year, Month, Jday, GearConditionCode, 
         IEPFishCode, ForkLength, CountAdj, Volume, DO, WaterTemp, 
         Turbidity, Secchi, SpecificConductance) %>%
  mutate(IEPFishCode = as.character(IEPFishCode))

# Retrieve site coordinates for mapping:
djfmp_site_file <- contentid::resolve("hash://sha256/f0f9e66da7415df90a7c0ea4c01938ecadd71ad412af1ccc940e861e63e96ba7")
djfmp_sites0 <- read_csv(djfmp_site_file)
## Rows: 108 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): MethodCode, Location, StationCode
## dbl (2): Latitude, Longitude
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ybfmp_sites_file <- contentid::resolve("hash://sha256/acc9940abf5662e81ee594553e7dc46a05c4cace9c924dbf5352c0544bc7a481")
ybfmp_sites0 <- read_csv(ybfmp_sites_file)
## Rows: 26 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): StationCode, StationName, StationNumber, PeriodOfRecordFrom, Period...
## dbl (2): Latitude, Longitude
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
any(duplicated(djfmp_sites0$StationCode, ybfmp_sites0$StationCode)) # should be FALSE
## [1] FALSE
all_site_coordinates <- rbind(djfmp_sites0[ ,c("StationCode","Latitude","Longitude")],
                              ybfmp_sites0[ ,c("StationCode","Latitude","Longitude")])

Create species datasets and filter to appropriate months, sizes (Age-0)

## Missing lengths have the value 0. Filter those out here.

## SACPIK:

get_SACPIK_age0_data <- function(dat) {
  species_data <- dat %>%
    filter(IEPFishCode == "SACPIK")
  
  species_data_age0 <- species_data %>%
    filter(Month %in% c(6, 7)) %>%
    mutate(Age0_bool=(25 <= ForkLength & ForkLength <= 91)) %>%
    group_by(Program, EventID, Location, RegionCode, Region, StationCode, 
             SampleDate, Datetime, Year, Month, Jday, GearConditionCode, 
             Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
    summarise(TotalCountAdj_age0=sum(CountAdj[Age0_bool]),
                        .groups="drop") %>%
    mutate(CPUE = TotalCountAdj_age0/Volume,
           Volume_Z = scale(Volume),
           Julian_Z = scale(Jday),
           Julian_Sq_Z = scale(Jday^2))
  
  ## Double check that all fish are represented:
  tmp <- species_data %>%
    filter(Month %in% c(6, 7)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 91))
  stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
  
  return(species_data_age0)
}

data_SACPIK_age0 <- get_SACPIK_age0_data(data)


## SPLITT:

get_SPLITT_age0_data <- function(dat) {
  species_data <- data %>% 
    filter(IEPFishCode == "SPLITT")
  
  species_data_age0 <- species_data %>%
    filter(Month %in% c(5, 6)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 118)) %>%
    group_by(Program, EventID, Location, RegionCode, Region, StationCode, 
             SampleDate, Datetime, Year, Month, Jday, GearConditionCode, 
             Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
    summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
                        .groups="drop") %>%
    mutate(CPUE = TotalCountAdj_age0/Volume,
           Volume_Z = scale(Volume),
           Julian_Z = scale(Jday),
           Julian_Sq_Z = scale(Jday^2))
  
  ## Double check that all fish are represented:
  tmp <- species_data %>%
    filter(Month %in% c(5, 6)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 118))
  stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
  
  return(species_data_age0)
}

data_SPLITT_age0 <- get_SPLITT_age0_data(data)


## SASCUC:

get_SACSUC_age0_data <- function(dat) {
  species_data <- data %>% 
    filter(IEPFishCode == "SACSUC")
  
  species_data_age0 <- species_data %>%
    filter(Month %in% c(5, 6)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 86)) %>%
    group_by(Program, EventID, Location, RegionCode, Region, StationCode, 
             SampleDate, Datetime, Year, Month, Jday, GearConditionCode, 
             Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
    summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
                        .groups="drop") %>%
    mutate(CPUE = TotalCountAdj_age0/Volume,
           Volume_Z = scale(Volume),
           Julian_Z = scale(Jday),
           Julian_Sq_Z = scale(Jday^2))
  
  ## Double check that all fish are represented:
  tmp <- species_data %>%
    filter(Month %in% c(5, 6)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 86))
  stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
  
  return(species_data_age0)
}

data_SACSUC_age0 <- get_SACSUC_age0_data(data)


## COMCAR:

get_COMCAR_age0_data <- function(dat) {
  species_data <- data %>% 
    filter(IEPFishCode == "COMCAR")

  species_data_age0 <- species_data %>%
    filter(Month %in% c(5, 6)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 164)) %>%
    group_by(Program, EventID, Location, RegionCode, Region, StationCode, 
             SampleDate, Datetime, Year, Month, Jday, GearConditionCode, 
             Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
    summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
                        .groups="drop") %>%
    mutate(CPUE = TotalCountAdj_age0/Volume,
           Volume_Z = scale(Volume),
           Julian_Z = scale(Jday),
           Julian_Sq_Z = scale(Jday^2))
  
  ## Double check that all fish are represented:
  tmp <- species_data %>%
    filter(Month %in% c(5, 6)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 164))
  stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
  
  return(species_data_age0)
}

data_COMCAR_age0 <- get_COMCAR_age0_data(data)


## REDSHI:

get_REDSHI_age0_data <- function(dat) {
  species_data <- data %>% 
    filter(IEPFishCode == "REDSHI")

  species_data_age0 <- species_data %>%
    filter(Month %in% c(3, 4)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 49)) %>%
    group_by(Program, EventID, Location, RegionCode, Region, StationCode, 
             SampleDate, Datetime, Year, Month, Jday, GearConditionCode, 
             Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
    summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
                        .groups="drop") %>%
    mutate(CPUE = TotalCountAdj_age0/Volume,
           Volume_Z = scale(Volume),
           Julian_Z = scale(Jday),
           Julian_Sq_Z = scale(Jday^2))
  
  ## Double check that all fish are represented:
  tmp <- species_data %>%
    filter(Month %in% c(3, 4)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 49))
  stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
  
  return(species_data_age0)
}

data_REDSHI_age0 <- get_REDSHI_age0_data(data)


## GOLDSHI:

get_GOLDSHI_age0_data <- function(dat) {
  species_data <- data %>% 
    filter(IEPFishCode == "GOLDSHI")

  species_data_age0 <- species_data %>%
    filter(Month %in% c(6, 7)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 65)) %>%
    group_by(Program, EventID, Location, RegionCode, Region, StationCode, 
             SampleDate, Datetime, Year, Month, Jday, GearConditionCode, 
             Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
    summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
                        .groups="drop") %>%
    mutate(CPUE = TotalCountAdj_age0/Volume,
           Volume_Z = scale(Volume),
           Julian_Z = scale(Jday),
           Julian_Sq_Z = scale(Jday^2))
  
  ## Double check that all fish are represented:
  tmp <- species_data %>%
    filter(Month %in% c(6, 7)) %>%
    mutate(Age0=(25 <= ForkLength & ForkLength <= 65))
  stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
  
  return(species_data_age0)
}

data_GOLDSHI_age0 <- get_GOLDSHI_age0_data(data)

Plot all stations represented in the age-0 data sets

## Check region designations by mapping:
stopifnot(sum(is.na(data_SACPIK_age0$Region)) == 0)
stopifnot(sum(is.na(data_SPLITT_age0$Region)) == 0)
stopifnot(sum(is.na(data_SACSUC_age0$Region)) == 0)
stopifnot(sum(is.na(data_COMCAR_age0$Region)) == 0)
stopifnot(sum(is.na(data_REDSHI_age0$Region)) == 0)
stopifnot(sum(is.na(data_GOLDSHI_age0$Region)) == 0)

## Compile unique sites represented in the final data sets and check region assignments:
uniq_sites_in_data <- do.call("rbind", list(data_SACPIK_age0, 
                                            data_SPLITT_age0,
                                            data_SACSUC_age0,
                                            data_COMCAR_age0,
                                            data_REDSHI_age0,
                                            data_GOLDSHI_age0)) %>%
  distinct(Program, Location, StationCode, Region) %>%
  left_join(all_site_coordinates, by="StationCode") %>%
  mutate(Region_f = factor(Region, levels=all_regions, ordered=TRUE))

pal <- leaflet::colorFactor(viridis::turbo(256), uniq_sites_in_data$Region_f)

uniq_sites_in_data %>%
  leaflet::leaflet() %>%
  leaflet::addTiles() %>%
  leaflet::addCircleMarkers(
    color = ~pal(Region_f),
    stroke = FALSE,
    fillOpacity = 1,
    radius = 4, 
    lng = ~Longitude,
    lat = ~Latitude,
    label = ~StationCode) %>%
  leaflet::addLegend(
    title = "Region",
    pal = pal, 
    values = ~Region_f,
    position = "bottomright",
    opacity = 1)

Summary of sampling effort

createEffortPlot <- function(dat) {
  effort_summary <- dat %>%
    mutate(#Year_f=factor(Year, levels=sort(unique(Year)), ordered=TRUE),
           Month_f=factor(Month),
           Region_f = factor(Region, levels=all_regions, ordered=TRUE)) %>%
    group_by(Year, Month_f, Region_f, .drop=FALSE) %>%
    summarise(NumOfUniqSites=length(unique(StationCode)),
              NumOfSamples=dplyr::n(),
              .groups="drop") %>%
    mutate(#Year=paste0("'",substr(as.character(Year_f),3,4)),
           Month_num=as.numeric(as.character(Month_f)), 
           Month=factor(Month_num, levels=sort(unique(Month_num)),
                        labels=month.name[sort(unique(Month_num))], ordered=TRUE))
  
  ret <- ggplot(effort_summary) + 
    geom_col(aes(x=Year, y=NumOfSamples)) + 
    geom_text(aes(x=Year, y=140, label=NumOfUniqSites), size=2.5, col="blue") + 
    facet_grid(rows=vars(Region_f), cols=vars(Month)) + 
    scale_x_continuous(minor_breaks=seq(1995, 2019, by=1))
  
  plot(ret)
}

p_effort_SACPIK <- createEffortPlot(data_SACPIK_age0) + ggtitle("Pikeminnow")

p_effort_SPLITT <- createEffortPlot(data_SPLITT_age0) + ggtitle("Splittail")

p_effort_SACSUC <- createEffortPlot(data_SACSUC_age0) + ggtitle("Sucker")

p_effort_COMCAR <- createEffortPlot(data_COMCAR_age0) + ggtitle("Carp")

p_effort_REDSHI <- createEffortPlot(data_REDSHI_age0) + ggtitle("Red shiner")

p_effort_GOLDSHI <- createEffortPlot(data_GOLDSHI_age0) + ggtitle("Golden shiner")

pdf("Figures/effort_figures.pdf", onefile=TRUE, width=10)
  p_effort_SACPIK
  p_effort_SPLITT
  p_effort_SACSUC
  p_effort_COMCAR
  p_effort_REDSHI
  p_effort_GOLDSHI
dev.off()
## png 
##   2

Calculate Regional and System Wide Indices

calculateIndices <- function(dat) {
  ## Require that all selected months be present to have a non-missing index.
  ## Assume that all selected months are represented in dat and use tidyr::complete().

  ret <- dat %>%
    group_by(StationCode, Year, Month, Region) %>% 
    summarise(CPUE = mean(CPUE), 
              .groups="drop") %>% 
    group_by(Year, Month, Region) %>%
    summarise(CPUE = mean(CPUE), 
              .groups="drop") %>% 
    tidyr::complete(Year, Month, Region) %>%
    group_by(Year, Region) %>% 
    summarise(Index = mean(CPUE), 
              .groups="drop") %>%
    mutate(log_Index=log(Index + 0.00001)) %>%
    tidyr::pivot_wider(id_cols=Year,
                       names_from=Region,
                       values_from=c(Index, log_Index)) %>%
    mutate(Index_Watershed=Index_Delta + Index_Sacramento + 
                              Index_San_Joaquin + Index_Yolo + Index_Liberty_Island)

  return(ret)
}

SACPIK_Index_Final <- calculateIndices(data_SACPIK_age0)
SPLITT_Index_Final <- calculateIndices(data_SPLITT_age0)
SACSUC_Index_Final <- calculateIndices(data_SACSUC_age0)
COMCAR_Index_Final <- calculateIndices(data_COMCAR_age0)
REDSHI_Index_Final <- calculateIndices(data_REDSHI_age0)
GOLDSHI_Index_Final <- calculateIndices(data_GOLDSHI_age0)


## Figures:
plotIndices <- function(index_dat) {
  ret <- index_dat %>%
    dplyr::select(-c(log_Index_Delta, log_Index_Liberty_Island, log_Index_Sacramento,
                     log_Index_San_Joaquin, log_Index_Yolo)) %>%
    tidyr::pivot_longer(cols=-Year, names_to="Region", values_to="Index",
                        names_prefix="Index_") %>%
    dplyr::mutate(Region_f=factor(Region, levels=c(all_regions,"Watershed"), 
                                  ordered=TRUE)) %>%
    ggplot() + 
      geom_col(aes(x=Year, y=Index)) + 
      facet_grid(rows=vars(Region_f), scales="free_y") + 
      #scale_y_continuous(limits=c(0, NA), expand=c(0,0)) + 
      scale_x_continuous(breaks=seq(1995, 2019, by=2))
  
  return(ret)
}

p_SACPIK <- plotIndices(SACPIK_Index_Final) + ggtitle("Pikeminnow")
p_SACPIK 

p_SPLITT <- plotIndices(SPLITT_Index_Final) + ggtitle("Splittail")
p_SPLITT

p_SACSUC <- plotIndices(SACSUC_Index_Final) + ggtitle("Sucker")
p_SACSUC

p_COMCAR <- plotIndices(COMCAR_Index_Final) + ggtitle("Carp")
p_COMCAR

p_REDSHI <- plotIndices(REDSHI_Index_Final) + ggtitle("Red shiner")
p_REDSHI

p_GOLDSHI <- plotIndices(GOLDSHI_Index_Final) + ggtitle("Golden shiner")
p_GOLDSHI

pdf("Figures/index_figures.pdf", onefile=TRUE)
  p_SACPIK
  p_SPLITT
  p_SACSUC
  p_COMCAR
  p_REDSHI
  p_GOLDSHI
dev.off()
## png 
##   2

Environmental covariate time periods, based on 25-75th percentile of spawning window:

Sacramento Pikeminnow: May 1 - June 30

Splittail: February 22 - June 8

Sacramento sucker: March 10 - May 23

Common Carp: April 30 - June 30

Red Shiner: May 9 - July 24

Golden Shiner: April 30 - June 30

environ_date_lookup <- data.frame(
  species=c("SACPIK","SPLITT","SACSUC","COMCAR","REDSHI","GOLDSHI"),
  min_day=c("05-01","02-22","03-10","04-30","05-09","04-30"),
  max_day=c("06-30","06-08","05-23","06-30","07-24","06-30")
)

Discharge covariates

## Discharge data is taken DNR Day Flow data
# SJ = SJR
# Sac = SAC - not going to use YOLO because the sites are upstream of the bypass
# Delta = OUT 

## Timing of flows:
## Centroid
# Looking at timing of flow by julian day in each region
# Formula taken from Sommer et al. 2011
# PF = sum(Julian day * flow)/sum(flow)
# Going to calculate timing of flow for Jan - May because peak spawning for all species should have started by May and high flows earlier may have allowed them to reach the spawning grounds


Discharge <- read.csv("data/dayflow_1994_2019.csv") %>%
  filter(Year %in% 1995:2019) %>%
  mutate(Date=as.Date(Date, "%d-%b-%y"),
         Julian_Date=as.POSIXlt(Date)$yday + 1)


# Variables were created below anticipating reviewers thinking about flow earlier that Jan

# Made a variable for water year w/ Oct as month 1
# Then did a julian date with Oct 1 as 1
# Did case when due to leap years

# This needs to be checked before it is used
Discharge_WY <- Discharge %>%
  mutate(Water_Year=ifelse(Mo >= 10, Year + 1, Year),
         WaterYearStartDate=as.Date(paste0(Water_Year - 1,"-10-01")), 
         Julian_Date_Water_Year=as.numeric(Date - WaterYearStartDate) + 1)


calculateMeanFlowAndTiming <- function(discharge_dat, plot_opt=TRUE) {
  species <- unique(discharge_dat$species)
  spawning_window_min_day <- unique(discharge_dat$min_day)
  spawning_window_max_day <- unique(discharge_dat$max_day)
  
  ## sw = "spawning window"
  sw_flow_wide <- discharge_dat %>%
    dplyr::mutate(ref_date_min=as.Date(paste(Year,min_day, sep="-")),
                  ref_date_max=as.Date(paste(Year,max_day, sep="-"))) %>%
    dplyr::filter(ref_date_min <= Date & Date <= ref_date_max) %>%
    dplyr::select(Julian_Date, Year, Date, ref_date_min, ref_date_max, 
                  SJR, SAC, OUT, YOLO) %>%
    dplyr::group_by(Year) %>% 
    dplyr::reframe(Julian_Date, Year, Date, ref_date_min, ref_date_max,
                   SJR, SAC, OUT, YOLO,
                   ## Save here for optional plotting later:
                   scaled_SJ=SJR/sum(SJR),
                   scaled_Sac=SAC/sum(SAC),
                   scaled_Delta=OUT/sum(OUT),
                   scaled_Yolo=YOLO/sum(YOLO))
  
  sw_flow_summary_wide <- sw_flow_wide %>%
    dplyr::group_by(Year) %>% 
    dplyr::summarise(
      MeanFlow_SJ = mean(SJR),
      MeanFlow_Sac = mean(SAC),
      MeanFlow_Delta = mean(OUT),
      MeanFlow_Yolo = mean(YOLO),
      
      ## Time of flow within the selected spawning window:
      FlowTiming_SJ = sum(Julian_Date * scaled_SJ),
      FlowTiming_Sac = sum(Julian_Date * scaled_Sac),
      FlowTiming_Delta = sum(Julian_Date * (scaled_Delta)),
      FlowTiming_Yolo = sum(Julian_Date * (scaled_Yolo)),
      .groups="drop")
  
  ## Optional plotting:
  if(plot_opt) {
    scaled_sw_flow_long <- sw_flow_wide %>%
      dplyr::select(Year, Julian_Date, Date, ref_date_min, ref_date_max, 
                    scaled_SJ, scaled_Sac, scaled_Delta, scaled_Yolo) %>%
      tidyr::pivot_longer(cols=c(scaled_SJ, scaled_Sac, scaled_Delta, scaled_Yolo),
                          names_to="Region",
                          values_to="ScaledFlow",
                          names_prefix="scaled_")
    
    flow_timing_long_tmp <- sw_flow_summary_wide %>%
      dplyr::select(Year, FlowTiming_SJ, FlowTiming_Sac, FlowTiming_Delta, 
                    FlowTiming_Yolo) %>%
      tidyr::pivot_longer(cols=-Year,
                          names_to="Region",
                          values_to="FlowTimingJulianDay",
                          names_prefix="FlowTiming_")
    
    tmp_interpolation_df <- scaled_sw_flow_long %>%
      dplyr::left_join(flow_timing_long_tmp, by=c("Year","Region"))
    
    tmp_interpolation_list <- split(tmp_interpolation_df, f= ~ Region + Year)
    
    flow_timing_long <- do.call("rbind", lapply(tmp_interpolation_list, function(dat) {
      dat$ScaledFlowInterpolated <- approx(x=dat$Julian_Date, y=dat$ScaledFlow, 
                                           xout=unique(dat$FlowTimingJulianDay), 
                                           method="linear")$y
      ret <- dat %>%
        dplyr::select(Year, Region, FlowTimingJulianDay, ScaledFlowInterpolated) %>%
        dplyr::distinct()
      
      return(ret)
    }))
    
    p <- ggplot(scaled_sw_flow_long) + 
      geom_line(aes(x=Julian_Date, y=ScaledFlow, col=Region)) + 
      facet_wrap( ~ Year, scales="free_y") + 
      scale_y_continuous(limits=c(0,NA)) + 
      geom_segment(data=flow_timing_long, 
                   aes(x=FlowTimingJulianDay, xend=FlowTimingJulianDay,
                       y=0, yend=ScaledFlowInterpolated), col="black") + 
      geom_point(data=flow_timing_long, 
                 aes(x=FlowTimingJulianDay, y=ScaledFlowInterpolated, fill=Region, 
                     col=Region, pch=Region),
                 size=2) + 
      theme_bw() +
      labs(x="Spawning window Julian date", y="Scaled flow: daily_flow/sum(daily_flow)",
           title=paste0(species, "; using spawning window: ",spawning_window_min_day, 
                        " through ", spawning_window_max_day)) + 
      scale_shape_manual(values=c(21,22,24,25))
    print(p)
  }

  return(sw_flow_summary_wide)
}

SACPIK_Discharge <- Discharge %>%
  dplyr::mutate(species="SACPIK") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanFlowAndTiming()

SPLITT_Discharge <- Discharge %>% 
  dplyr::mutate(species="SPLITT") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanFlowAndTiming()

SACSUC_Discharge <- Discharge %>% 
  dplyr::mutate(species="SACSUC") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanFlowAndTiming()

COMCAR_Discharge <- Discharge %>% 
  dplyr::mutate(species="COMCAR") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanFlowAndTiming()

REDSHI_Discharge <- Discharge %>% 
  dplyr::mutate(species="REDSHI") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanFlowAndTiming()

GOLDSHI_Discharge <- Discharge %>% 
  dplyr::mutate(species="GOLDSHI") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanFlowAndTiming()

# ## Timing of flow by year:
# Flow_Timing_Y <- Discharge %>% 
#   filter(Mo %in% c(1:5)) %>% 
#   group_by(Year) %>% 
#   summarise(FlowTiming_SJ = sum(Julian_Date * SJR)/sum(SJR),
#             FlowTiming_Sac = sum(Julian_Date * SAC)/sum(SAC),
#             FlowTiming_Delta = sum(Julian_Date * (OUT))/sum(OUT),
#             FlowTiming_Yolo = sum(Julian_Date * (YOLO))/sum(YOLO))
# 
# ## Timing of flow by water year:
# Flow_Timing_WY <- Discharge_WY %>% 
#   filter(Mo %in% c(10:12, 1:5)) %>% 
#   group_by(Water_Year) %>% 
#   summarise(FlowTimingWY_SJ = sum(Julian_Date_Water_Year * SJR)/sum(SJR),
#             FlowTimingWY_Sac = sum(Julian_Date_Water_Year * SAC)/sum(SAC),
#             FlowTimingWY_Delta = sum(Julian_Date_Water_Year * (OUT))/sum(OUT),
#             FlowTiming_Yolo = sum(Julian_Date * (YOLO))/sum(YOLO))
# 
# head(Flow_Timing_Y)
# head(Flow_Timing_WY)

Water temperature covariates

calculateMeanWaterTemp <- function(survey_dat) {
  ## Similar to calculateIndices() ...
  ## Require that all selected months be present to have a non-missing index.
  ## Assume that all selected months are represented in dat and use tidyr::complete().

  ret <-  survey_dat %>%
    dplyr::mutate(ref_date_min=as.Date(paste(Year,min_day, sep="-")),
                  ref_date_max=as.Date(paste(Year,max_day, sep="-"))) %>%
    dplyr::filter(ref_date_min <= SampleDate & SampleDate <= ref_date_max) %>%
    group_by(StationCode, Year, Region) %>% 
    summarise(Mean_WT = mean(WaterTemp, na.rm=TRUE),
              .groups="drop") %>% 
    group_by(Year, Region) %>%
    summarise(Mean_WT = mean(Mean_WT),
              .groups="drop") %>%
    tidyr::complete(Year, Region) %>%
    tidyr::pivot_wider(id_cols=Year,
                       names_from=Region,
                       values_from=Mean_WT,
                       names_prefix="MeanWT_")

  return(ret)
}

SACPIK_MeanWT <- data %>%
  dplyr::mutate(species="SACPIK") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanWaterTemp()

SPLITT_MeanWT <- data %>%
  dplyr::mutate(species="SPLITT") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanWaterTemp()

SACSUC_MeanWT <- data %>%
  dplyr::mutate(species="SACSUC") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanWaterTemp()

COMCAR_MeanWT <- data %>%
  dplyr::mutate(species="COMCAR") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanWaterTemp()

REDSHI_MeanWT <- data %>%
  dplyr::mutate(species="REDSHI") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanWaterTemp()

GOLDSHI_MeanWT <- data %>%
  dplyr::mutate(species="GOLDSHI") %>%
  dplyr::left_join(environ_date_lookup, by="species") %>%
  calculateMeanWaterTemp()

Compile indices and covariates

addStandardizedCovariates <- function(x) {
  all_names <- names(x)
  covar_names <- all_names[all_names != "Year"]
  
  for(this_col in covar_names) {
    x[ ,paste0(this_col,"_stand")] <- scale(x[ ,this_col])
  }
  return(x)
}

## SACPIK:
SACPIK_Covariates <- dplyr::full_join(SACPIK_Discharge, SACPIK_MeanWT, by="Year")
SACPIK_Covariates <- addStandardizedCovariates(SACPIK_Covariates)

SACPIK_final <- dplyr::left_join(SACPIK_Index_Final,
                                 SACPIK_Covariates,
                                 by="Year")

## SPLITT:
SPLITT_Covariates <- dplyr::full_join(SPLITT_Discharge, SPLITT_MeanWT, by="Year")                      
SPLITT_Covariates <- addStandardizedCovariates(SPLITT_Covariates)

SPLITT_final <- dplyr::left_join(SPLITT_Index_Final,
                                 SPLITT_Covariates,
                                 by="Year")

## SACSUC:
SACSUC_Covariates <- dplyr::full_join(SACSUC_Discharge, SACSUC_MeanWT, by="Year")                                    
SACSUC_Covariates <- addStandardizedCovariates(SACSUC_Covariates)

SACSUC_final <- dplyr::left_join(SACSUC_Index_Final,
                                 SACSUC_Covariates,
                                 by="Year")

## COMCAR:
COMCAR_Covariates <- dplyr::full_join(COMCAR_Discharge, COMCAR_MeanWT, by="Year")
COMCAR_Covariates <- addStandardizedCovariates(COMCAR_Covariates)

COMCAR_final <- dplyr::left_join(COMCAR_Index_Final,
                                 COMCAR_Covariates,
                                 by="Year")

## REDSHI:
REDSHI_Covariates <- dplyr::full_join(REDSHI_Discharge, REDSHI_MeanWT, by="Year")         
REDSHI_Covariates <- addStandardizedCovariates(REDSHI_Covariates)

REDSHI_final <- dplyr::left_join(REDSHI_Index_Final,
                                 REDSHI_Covariates,
                                 by="Year")

## GOLDSHI:
GOLDSHI_Covariates <- dplyr::full_join(GOLDSHI_Discharge, GOLDSHI_MeanWT, by="Year")
GOLDSHI_Covariates <- addStandardizedCovariates(GOLDSHI_Covariates)

GOLDSHI_final <- dplyr::left_join(GOLDSHI_Index_Final,
                                  GOLDSHI_Covariates,
                                  by="Year")

Plots

plot_corr <- function(dat, species, region) {
  stopifnot(region %in% c("Delta","Liberty Island","Yolo","Sacramento","San Joaquin"))
  
  if(region == "Delta") {
    use_dat <- dat[ ,c("Index_Delta","MeanFlow_Delta_stand",
                       "FlowTiming_Delta_stand","MeanWT_Delta_stand")]
    
  } else if(region == "Liberty Island") {
    ## Using Yolo flow for Liberty Island??
    use_dat <- dat[ ,c("Index_Liberty_Island","MeanFlow_Yolo_stand",
                       "FlowTiming_Yolo_stand","MeanWT_Liberty_Island_stand")]
    
  } else if(region == "Yolo") {
    use_dat <- dat[ ,c("Index_Yolo","MeanFlow_Yolo_stand",
                       "FlowTiming_Yolo_stand","MeanWT_Yolo_stand")]
    
  } else if(region == "Sacramento") {
    use_dat <- dat[ ,c("Index_Sacramento","MeanFlow_Sac_stand",
                       "FlowTiming_Sac_stand","MeanWT_Sacramento_stand")]
    
  } else if(region == "San Joaquin") {
    use_dat <- dat[ ,c("Index_San_Joaquin","MeanFlow_SJ_stand",
                       "FlowTiming_SJ_stand","MeanWT_San_Joaquin_stand")]
    
  }

  pairs(use_dat, 
        lower.panel=function(x, y) {
           text(x, y, paste0("'",substr(dat$Year, 3, 4)))
         },
         upper.panel=function(x, y) {
           text(mean(range(x, na.rm=TRUE)), mean(range(y, na.rm=TRUE)),
                round(cor(x, y, use="complete.obs"), digits=2), col="red", cex=2)
         }, main=paste(species, region, sep=" - "))
}


plot_corr(SACPIK_final, species="Pikeminnow", region="Delta")

plot_corr(SPLITT_final, species="Splittail", region="Delta")

plot_corr(SACSUC_final, species="Sucker", region="Delta")

plot_corr(COMCAR_final, species="Carp", region="Delta")

plot_corr(REDSHI_final, species="Red shiner", region="Delta")

plot_corr(GOLDSHI_final, species="Golden shiner", region="Delta")

plot_corr(SACPIK_final, species="Pikeminnow", region="Liberty Island")

plot_corr(SPLITT_final, species="Splittail", region="Liberty Island")

plot_corr(SACSUC_final, species="Sucker", region="Liberty Island")

plot_corr(COMCAR_final, species="Carp", region="Liberty Island")

plot_corr(REDSHI_final, species="Red shiner", region="Liberty Island")

plot_corr(GOLDSHI_final, species="Golden shiner", region="Liberty Island")

plot_corr(SACPIK_final, species="Pikeminnow", region="Yolo")

plot_corr(SPLITT_final, species="Splittail", region="Yolo")

plot_corr(SACSUC_final, species="Sucker", region="Yolo")

plot_corr(COMCAR_final, species="Carp", region="Yolo")

plot_corr(REDSHI_final, species="Red shiner", region="Yolo")

plot_corr(GOLDSHI_final, species="Golden shiner", region="Yolo")

plot_corr(SACPIK_final, species="Pikeminnow", region="Sacramento")

plot_corr(SPLITT_final, species="Splittail", region="Sacramento")

plot_corr(SACSUC_final, species="Sucker", region="Sacramento")

plot_corr(COMCAR_final, species="Carp", region="Sacramento")

plot_corr(REDSHI_final, species="Red shiner", region="Sacramento")

plot_corr(GOLDSHI_final, species="Golden shiner", region="Sacramento")

plot_corr(SACPIK_final, species="Pikeminnow", region="San Joaquin")

plot_corr(SPLITT_final, species="Splittail", region="San Joaquin")

plot_corr(SACSUC_final, species="Sucker", region="San Joaquin")

plot_corr(COMCAR_final, species="Carp", region="San Joaquin")

plot_corr(REDSHI_final, species="Red shiner", region="San Joaquin")

plot_corr(GOLDSHI_final, species="Golden shiner", region="San Joaquin")

Save final data sets

write.csv(SACPIK_final, "data_clean/SACPIK_index.csv", row.names=FALSE)
write.csv(SPLITT_final, "data_clean/SPLITT_index.csv", row.names=FALSE)
write.csv(SACSUC_final, "data_clean/SACSUC_index.csv", row.names=FALSE)
write.csv(COMCAR_final, "data_clean/COMCAR_index.csv", row.names=FALSE)
write.csv(REDSHI_final, "data_clean/REDSHI_index.csv", row.names=FALSE)
write.csv(GOLDSHI_final, "data_clean/GOLDSHI_index.csv", row.names=FALSE)

Calculate Center of Distribution: Line 745

# SACSUC_RM <- 
#   data_SACSUC_age0 %>% 
#   left_join(Site_RM, 
#             by = "StationCode") %>% 
#   filter(CPUE != 0) # Removing records where CPUE was 0. This will prevent NAs in SJ when there were years w/ 0 catch (OW 0 CPUE shouldn't matter)
# 
# # Sacramento River
# SACSUC_RM_Sac <-
#   SACSUC_RM %>% 
#   filter(grepl("SR", StationCode)) # Selecting stations that begin w/ SR
# head(SACSUC_RM_Sac)

# # SJ River
# SACSUC_RM_SJ <-
#   SACSUC_RM %>% 
#   filter(grepl("SJ", StationCode)) # Selecting stations that begin w/ SJ
# head(SACSUC_RM_SJ)

# SACSUC_CDist_SAC <- 
#   SACSUC_RM_Sac %>% 
#   filter(Year %in% c(1995:2019)) %>% 
#   group_by(Year) %>% 
#   summarise(CDist_SAC = sum(RiverKilo * CPUE)/sum(CPUE))
# head(SACSUC_CDist_SAC)

# SACSUC_Month_CDist_SAC <- 
#   SACSUC_RM_Sac %>% 
#   filter(Year %in% c(1995:2019)) %>% 
#   group_by(Year, 
#            Month) %>% 
#   summarise(CDist_SAC = sum(RiverKilo * CPUE)/sum(CPUE))
# head(SACSUC_Month_CDist_SAC)
# 
# SACSUC_CDist_SJ <- 
#   SACSUC_RM_SJ %>% 
#   filter(Year %in% c(1995:2019)) %>% 
#   group_by(Year) %>% 
#   summarise(CDist_SJ = sum(RiverKilo * CPUE)/sum(CPUE))
# head(SACSUC_CDist_SJ)

# SACSUC_CDist <- 
#   SACSUC_CDist_SAC %>% 
#   left_join(SACSUC_CDist_SJ,
#             by = "Year")
# head(SACSUC_CDist)